#############################################################
####            International Interactions, 2018         ####
#### Hannes Weber: Age structure and political violence  ####
####                  Replication code                   ####
####                Updated: 12 Jul 2018                 ####
#############################################################


setwd("C:/Data") #Please change if data file is located in different folder.

#Skip the following two lines if these packages are already installed.
install.packages(c("stargazer", "plm", "pglm", "Amelia", "extrafont", "ggthemes", 
                   "ggplot2", "cowplot", "rgdal", "maptools", "nFactors", "sp"))

rm(list=ls())
library(stargazer)
library(plm)
library(pglm)
library(Amelia)
library(extrafont)
library(ggthemes)
library(ggplot2)
library(cowplot)
library(rgdal)        
library(maptools)
library(sp)

#Make sure the data file is located in the folder specified above in line 9
load("Weber2018_replication_data.RData")

#Please note: All information on data sources can be found in the paper.


#####################################################################################
#Figure 1

Sys.setlocale("LC_ALL", "English_United States.1252")

cols <- c("#ffffb2", "#fecc5c", "#fd8d3c", "#f03b20","#bd0026")
breaks <- c(0,23,34,41,47,100)

png(file="Figure 1.png",width=4000,height=3000,res=600)
f <- par(family="Garamond", cex.main=1)
plot(karte, col = cols[findInterval(kart$YouthRatio,breaks, all.inside = TRUE)], 
     border = NA)
plot(karte, lty=1, lwd=0.3, add = TRUE)
legend("bottom", legend = leglabs(round(breaks, digits = 1), under = "Less than",
                                  over = "Greater than", between = " - "), 
       fill = cols, bty = "n", cex = 0.6, ncol=5,
       title = "Youth ratio in 2015 (%):")
title("Figure 1: Youth ratio in 182 countries of the world, 2015", cex.main=.8)
par(f)
dev.off()



###########################################################################################
#Figure 2


#Iran
example <- daten[daten$Country=="IRAN, ISLAMIC REP.",c(4,8,9,17,20,21,50)]
example$YouthRatio <- (example$YouthRatio/100)-.2
example$EduM2429Secondary <- example$EduM2429Secondary-.2
example$EduM2429Tertiary <- example$EduM2429Tertiary-.2
example$ucdp_fatalities <- (example$ucdp_fatalities/max(example$ucdp_fatalities))/3

a <- ggplot(data=example, aes(x=Year)) + 
  geom_col(aes(y=ucdp_fatalities, fill="Deaths from political violence", alpha=.3)) +
  geom_line(aes(y=YouthRatio, linetype="Youth Ratio")) +
  geom_line(aes(y=EduM2429Secondary, linetype="Education")) +
  #geom_line(aes(y=UnempMaleYouthILO, linetype="Unemployment")) +
  scale_linetype_manual(values = c("dashed", "solid")) +
  scale_colour_manual(values = c("black", "black")) + 
  scale_fill_manual(values=c("grey"))+
  scale_alpha_continuous(guide=F)+
  scale_y_continuous(name="Youth ratio/ Sec. education", breaks=c(0,.1,.2,.3,.4),limits=c(0,.5),
                     labels=c("0.2", "0.3", "0.4", "0.5","0.6"),expand=c(0,0))+xlab("") +
  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25, axis.title.y=element_text(size=10),
        legend.position=("top"),legend.title = element_blank(),
        axis.text.x = element_text(angle=90, family="Garamond"),
        text=element_text(family="Garamond"))

b <- ggplot(data=example, aes(x=Year, y=ucdp_fatalities)) + 
  geom_blank() +  scale_y_continuous(name="Deaths from political violence",
                                     breaks = c(0,.15,.3),position="right",
                                     labels = c("0", "100", "200"),expand=c(0,0)) +
  xlab("") +  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25,axis.title.y=element_text(size=10),
        text=element_text(family="Garamond"),
        axis.ticks.x = element_blank(),axis.title.x=element_blank(),
        axis.text.x = element_blank(),axis.line.x=element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_rect(fill = "transparent",colour = NA))


#Colombia
example <- daten[daten$Country=="COLOMBIA",c(4,8,9,17,20,21,50)]
head(example)
example$YouthRatio <- (example$YouthRatio/100)-.2
example$EduM2429Secondary <- example$EduM2429Secondary-.2
example$EduM2429Tertiary <- example$EduM2429Tertiary-.2
example$ucdp_fatalities <- (example$ucdp_fatalities/max(example$ucdp_fatalities))/3

c <- ggplot(data=example, aes(x=Year)) + 
  geom_col(aes(y=ucdp_fatalities, fill="Deaths from political violence", alpha=.3)) +
  geom_line(aes(y=YouthRatio, linetype="Youth Ratio")) +
  geom_line(aes(y=EduM2429Secondary, linetype="Education")) +
  scale_linetype_manual(values = c("dashed", "solid")) +
  scale_colour_manual(values = c("black", "black")) + 
  scale_fill_manual(values=c("grey"))+
  scale_alpha_continuous(guide=F)+
  scale_y_continuous(name="Youth ratio/ Sec. education", breaks=c(0,.1,.2,.3),limits=c(0,.4),
                     labels=c("0.2", "0.3", "0.4", "0.5"),expand=c(0,0))+xlab("") +
  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25, axis.title.y=element_text(size=10),
        legend.position=("top"),legend.title = element_blank(),
        axis.text.x = element_text(angle=90, family="Garamond"),
        text=element_text(family="Garamond"))

d <- ggplot(data=example, aes(x=Year, y=ucdp_fatalities)) + 
  geom_blank() +  scale_y_continuous(name="Deaths from political violence",
                     breaks = c(0.014,.15,.3),position="right",
                     labels = c("0", "1,500", "3,000"),expand=c(0,0)) +
  xlab("") +  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25,axis.title.y=element_text(size=10),
        text=element_text(family="Garamond"),
        axis.ticks.x = element_blank(),axis.title.x=element_blank(),
        axis.text.x = element_blank(),axis.line.x=element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_rect(fill = "transparent",colour = NA))


#Chad
example <- daten[daten$Country=="CHAD",c(4,8,9,17,20,21,50)]
example$YouthRatio <- (example$YouthRatio/100)-.2
example$EduM2429Secondary <- example$EduM2429Secondary-.2
example$EduM2429Tertiary <- example$EduM2429Tertiary-.2
example$ucdp_fatalities <- (example$ucdp_fatalities/max(example$ucdp_fatalities))/3

e <- ggplot(data=example, aes(x=Year)) + 
  geom_col(aes(y=ucdp_fatalities, fill="Deaths from political violence", alpha=.3)) +
  geom_line(aes(y=YouthRatio, linetype="Youth Ratio")) +
  geom_line(aes(y=EduM2429Secondary, linetype="Education")) +
  scale_linetype_manual(values = c("dashed", "solid")) +
  scale_colour_manual(values = c("black", "black")) + 
  scale_fill_manual(values=c("grey"))+
  scale_alpha_continuous(guide=F)+
  scale_y_continuous(name="Youth ratio/ Sec. education", breaks=c(0,.1,.2,.3),limits=c(0,.4),
                     labels=c("0.2", "0.3", "0.4", "0.5"),expand=c(0,0))+xlab("") +
  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25, axis.title.y=element_text(size=10),
        legend.position=("top"),legend.title = element_blank(),
        axis.text.x = element_text(angle=90, family="Garamond"),
        text=element_text(family="Garamond"))

f <- ggplot(data=example, aes(x=Year, y=ucdp_fatalities)) + 
  geom_blank() +  scale_y_continuous(name="Deaths from political violence",
                                     breaks = c(0,.15,.3),position="right",
                                     labels = c("0", "1,000", "2,000"),expand=c(0,0)) +
  xlab("") +  theme_classic(base_size=12) +
  theme(aspect.ratio = 0.25,axis.title.y=element_text(size=10),
        text=element_text(family="Garamond"),
        axis.ticks.x = element_blank(),axis.title.x=element_blank(),
        axis.text.x = element_blank(),axis.line.x=element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_rect(fill = "transparent",colour = NA))


png(file="Figure 2.png",width=8000,height=12000,res=1200)
ggdraw(xlim=c(0,1.2),ylim=c(0,3))+
  draw_plot(e,0,0)+draw_plot(f,.077,-.016)+
  draw_label("Chad",.6,.9,fontfamily="Garamond")+
  draw_plot(a,0,.9)+draw_plot(b,.070,.888)+
  draw_label("Iran",.6,1.8,fontfamily="Garamond") +
  draw_plot(c,0,1.8)+draw_plot(d,.077,1.784)+
  draw_label("Colombia",.6,2.7,fontfamily="Garamond")+
  draw_label("Figure 2. Youth ratio, secondary education \namong youth and political violence in three countries",
             .6,2.9,fontfamily="Garamond",fontface="bold") 
dev.off()


###########################################################################################
#Summary statistics table for appendix
###########################################################################################

daten_sum <- daten[,c(1,4,6:22,24,26,28:42,50:52,55:56)]

stargazer(daten_sum, type="html", out="Table A1.htm")


###########################################################################################
#Multiple Imputation
###########################################################################################

set.seed(98765)
ids=c("CASEID","ID","CountryYear","AdverseRegimeChange","RegimeType","WaterStress",
      "EduM2429Tertiary","WorldRegion","period","CountryPeriod","PeriodID","RegimeTypePeriod")
ord=c("FH","polity2","parcomp","exrec")

a.out <- amelia(x = daten, m=10, cs = "Country", ts = "Year", 
                logs=c("GDP_cap","InfMort","GDP_t_20", "ucdp_fatalities"),
                idvars=ids, ords=ord)


###########################################################################################
#Panel models, dependent variable = fatalities from political violence (UCDP)
###########################################################################################

#Drop Countries that have only zeros
agg <- aggregate(ucdp_fatalities~Country, daten, sum)
zeros <- agg$Country[agg$ucdp_fatalities == 0]
ucpd_daten <- daten[!(daten$Country %in% zeros),]
ucpd_daten <- ucpd_daten[!is.na(ucpd_daten$ucdp_fatalities),]
ucpd_paneldaten <- plm.data(ucpd_daten, index=c("Country","Year"))

#Count models: Negative binomial models, without multiple imputation, fixed effects
#LDV
nb1l <- pglm(ucdp_fatalities ~ lag(ucdp_fatalities,1)+YouthRatio, 
            data=ucpd_paneldaten, family=negbin, model="within", effects="twoways", method="nr",
            print.level=3)

#Fixed effects
nb1 <- pglm(ucdp_fatalities ~ YouthRatio, 
            data=ucpd_paneldaten, family=negbin, model = "within", effects="twoways",method="nr",print.level=3)

nb2 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+
              GDP_growth+log(Pop)+factor(RegimeType),
            data=ucpd_paneldaten, family=negbin, model = "within", effects="twoways",method="nr",
            print.level=3)

#With factor scores for economic development and age structure
library(nFactors)

mi1 <- subset(ucpd_paneldaten, select=c("YouthRatio", "TFR_t_20", "GDP_cap","InfMort", "Urban"))
mi1$GDP_cap <- log(mi1$GDP_cap)
faktor <- fa(mi1, nfactors=2,fm="minres", rotate="varimax", scores="regression")

ucpd_paneldaten$factor_demography <- as.numeric(scale(faktor$scores[,2]))
ucpd_paneldaten$factor_development <- as.numeric(scale(faktor$scores[,1]))

nb3a <- pglm(ucdp_fatalities ~ factor_demography+factor_development+
              GDP_growth+log(Pop)+factor(RegimeType),
            data=ucpd_paneldaten, family=negbin, model = "within", effects="twoways",method="nr",
            print.level=3)


#with multiple imputation
m <- 10
b.out <- NULL
se.out <- NULL
for(i in 1:m){
  mi_paneldaten <- plm.data(a.out$imputations[[i]], index=c("Country","Year"))
  nb4 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+
                  GDP_growth+log(Pop)+factor(RegimeType),
                data=mi_paneldaten, family=negbin, model = "within", effects="twoways",method="nr",
              print.level=3)
  b.out <- rbind(b.out, summary(nb4)$estimate[,1])
  se.out <- rbind(se.out, summary(nb4)$estimate[,2])
}
nb4m <- mi.meld(q=b.out, se=se.out)
nb4m$t <- nb4m$q/nb4m$se


#with dependent variable = fatalities from Global Terrorism Database 
agg2 <- aggregate(gtd_fatalities~Country, daten, sum)
zeros2 <- agg2$Country[agg2$gtd_fatalities == 0]
gtd_daten <- daten[!(daten$Country %in% zeros2),]
gtd_daten <- gtd_daten[!is.na(gtd_daten$gtd_fatalities),]
gtd_paneldaten <- plm.data(gtd_daten, index=c("Country","Year"))
nb5 <- pglm(gtd_fatalities ~ YouthRatio+log(GDP_cap)+
              GDP_growth+log(Pop)+factor(RegimeType),
            data=gtd_paneldaten, family=negbin, model = "within", effects="twoways",method="nm",
            print.level=3)


#Fake plm-objects, because stargazer() does not seem to recognize pglm objects
f2 <- plm(ucdp_fatalities ~ lag(ucdp_fatalities,1)+YouthRatio, data=ucpd_paneldaten, model="pooling")
f3 <- plm(ucdp_fatalities ~ YouthRatio, data=ucpd_paneldaten, model="pooling")
f4 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop)+factor(RegimeType),
          data=ucpd_paneldaten, model="pooling")
f4a <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop)+factor(RegimeType),
          data=ucpd_paneldaten, model="pooling")
f5 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop)+factor(RegimeType),
          data=mi_paneldaten, model="pooling")
f6 <- plm(gtd_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop)+factor(RegimeType),
          data=gtd_paneldaten, model="pooling")

#Export table - correct log-likelihood and information about fixed-effects at bottom by hand
stargazer(f2,f3,f4,f5,f4a,f6,type="html",
          title="Predictors of political violence, 1996-2015 (negative binomial panel models)",
          covariate.labels=c("Intercept", "Violent deaths (t-1)", "Youth ratio", 
                             "log(GDP per capita)",  "GDP growth", "log(Population)", 
                             "Regime type: full autocracy", "Regime type: partial autocracy", 
                             "Regime type: partial democracy", "Regime type: partial democracy with factionalism", 
                             "Regime type: full democracy"),
          out="Table 1.htm", star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.labels.include = FALSE,
          dep.var.caption = ("Model:"),
          column.labels = c("LDV", "Fixed effects", "FE + Controls", "With MI", "With factor scores", "DV = Terrorist victims"),
          coef=list(nb1l$estimate[1:3],nb1$estimate[1:2],nb2$estimate[1:10],nb4m$q.mi[1:10],unname(nb3a$estimate[1:10]),nb5$estimate[1:10]),
          se=list(summary(nb1l)$estimate[4:6],summary(nb1)$estimate[3:4],summary(nb2)$estimate[11:20],nb4m$se.mi[1:10],summary(nb3a)$estimate[11:20],summary(nb5)$estimate[11:20]),
          p.auto=T, notes.append=F, df=F, initial.zero=F,
          notes="LDV = lagged dependent variable. MI = Multiple imputation of missing data. FE = Fixed effects (country and year). Reference group for regime type: other or missing. In column (5), youth ratio and log(GDP per capita) are measured by multi-item factor scores, see text for details. Data sources: see text. *p<.05, **p<.01, ***p<.001",
          omit.stat = c("rsq", "f"), intercept.bottom=F,
          omit=c("factor_demography", "factor_development"), omit.labels=c("Time fixed-effects", "Country fixed-effects")
)



###########################################################################################
#Split-sample analyses with interaction effects
###########################################################################################

int_daten <- ucpd_paneldaten
int_daten$YouthRatio <- scale(int_daten$YouthRatio)
int_daten$EduM2429Secondary <- scale(int_daten$EduM2429Secondary)
int_daten$EduM2429Tertiary <- scale(int_daten$EduM2429Tertiary)
int_daten$UnempMaleYouthILO <- scale(int_daten$UnempMaleYouthILO)

#Mean-Split: Panel high-education countries
int_daten1 <- int_daten[int_daten$EduM2429Secondary>mean(int_daten$EduM2429Secondary, na.rm=T)&!is.na(int_daten$EduM2429Secondary),]
r1 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten1, family=negbin, model = "within", effects="twoways",method="nr")
r1a <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten, family=negbin, model = "within", effects="twoways",method="nr")

#Mean-Split: Panel low-education countries
int_daten2 <- int_daten[int_daten$EduM2429Secondary<=mean(int_daten$EduM2429Secondary, na.rm=T)&!is.na(int_daten$EduM2429Secondary),]
r2 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten2, family=negbin, model = "within", effects="twoways",method="nr")
r2a <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten2, family=negbin, model = "within", effects="twoways",method="nr")

#Mean-Split: Panel high-education countries (tertiary education)
int_daten3 <- int_daten[int_daten$EduM2429Tertiary>mean(int_daten$EduM2429Tertiary, na.rm=T)&!is.na(int_daten$EduM2429Tertiary),]
r3 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten3, family=negbin, model = "within", effects="twoways",method="nm")
r3a <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
            data=int_daten3, family=negbin, model = "within", effects="twoways",method="sann")

#Mean-Split: Panel low-education countries (tertiary education)
int_daten4 <- int_daten[int_daten$EduM2429Tertiary<=mean(int_daten$EduM2429Tertiary, na.rm=T)&!is.na(int_daten$EduM2429Tertiary),]
r4 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten4, family=negbin, model = "within", effects="twoways",method="nm")
r4a <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
            data=int_daten4, family=negbin, model = "within", effects="twoways",method="nm")

#Mean-Split Unemployment: Panel high unemployment
int_daten5 <- int_daten[int_daten$UnempMaleYouthILO>mean(int_daten$UnempMaleYouthILO, na.rm=T)&!is.na(int_daten$UnempMaleYouthILO),]
r5 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten5, family=negbin, model = "within", effects="twoways",method="nm")
r5a <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten5, family=negbin, model = "within", effects="twoways",method="sann")
r5b <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
            data=int_daten5, family=negbin, model = "within", effects="twoways",method="sann")

#Mean-Split Unemployment: Panel low unemployment
int_daten6 <- int_daten[int_daten$UnempMaleYouthILO<=mean(int_daten$UnempMaleYouthILO, na.rm=T)&!is.na(int_daten$UnempMaleYouthILO),]
r6 <- pglm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten6, family=negbin, model = "within", effects="twoways",method="nr")
r6a <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
           data=int_daten6, family=negbin, model = "within", effects="twoways",method="sann")
r6b <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary+log(GDP_cap)+GDP_growth+log(Pop),print.level=3,
            data=int_daten6, family=negbin, model = "within", effects="twoways",method="sann")

#Fake plm-objects
p1 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten1, model="pooling")
p1a <- plm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten1, model="pooling")
p2 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten2, model="pooling")
p2a <- plm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten2, model="pooling")
p3 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten3, model="pooling")
p3a <- plm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten3, model="pooling")
p4 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten4, model="pooling")
p4a <- plm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten4, model="pooling")
p5 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten5, model="pooling")
p5a <- plm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten5, model="pooling")
p5b <- plm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten5, model="pooling")
p6 <- plm(ucdp_fatalities ~ YouthRatio+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten6, model="pooling")
p6a <- plm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten6, model="pooling")
p6b <- plm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary+log(GDP_cap)+GDP_growth+log(Pop),data=int_daten6, model="pooling")

#Export table - Correct LogLikelihood etc by hand
stargazer(p1,p1a,p2,p2a,p3,p3a,p4,p4a,type="html",
          title="Table II. Split-sample analyses of political violence, by level of education, 1996-2015 (negative binomial panel models)",
          covariate.labels=c("Intercept", "Youth ratio", "Youth unemployment (males)",
                             "log(GDP per capita)",  "GDP growth", "log(Population)", 
                             "Youth ratio*Youth Unemployment"), 
          out="Table 2.htm", star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.labels.include = FALSE,
          dep.var.caption = ("Sample:"),
          column.labels = c("Secondary education", "Tertiary education"),
          column.separate = c(4,2),
          coef=list(r1$estimate[1:5],r1a$estimate[1:7],r2$estimate[1:5],r2a$estimate[1:7],r3$estimate[1:5],r3a$estimate[1:7],r4$estimate[1:5],r4a$estimate[1:7]),
          se=list(summary(r1)$estimate[6:10],summary(r1a)$estimate[8:14],summary(r2)$estimate[6:10],summary(r2a)$estimate[8:14],summary(r3)$estimate[6:10],summary(r3a)$estimate[8:14],summary(r4)$estimate[6:10],summary(r4a)$estimate[8:14]),
          p.auto=T, notes.append=F, df=F, initial.zero=F,
          notes="Dependent variable = yearly number of conflict-related deaths (UCDP event data). For interaction effects, variables are scaled to mean = 0 and standard deviation = 1. Data sources: see text. *p<.05, **p<.01, ***p<.001",
          omit.stat = c("rsq", "f"), intercept.bottom=F,
          omit=c("Year", "Country"), omit.labels=c("Time fixed-effects", "Country fixed-effects")
)

stargazer(p5,p5a,p5b,p6, p6a, p6b,type="html",
          title="Table III. Split-sample analyses of political violence, by unemployment rate, 1996-2015 (negative binomial panel models)",
          covariate.labels=c("Intercept", "Youth ratio", "Male youth secondary education", "Male youth tertiary education",
                             "log(GDP per capita)",  "GDP growth", "log(Population)", 
                             "Youth ratio*Secondary education","Youth ratio*Tecondary education" ), 
          out="Table 3.htm", star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.labels.include = FALSE,
          dep.var.caption = ("Sample: Unemployment rate among young males"),
          column.labels = c(rep("Higher",3), rep("Lower",3)),
          coef=list(r5$estimate[1:5],r5a$estimate[1:7],r5b$estimate[1:7],r6$estimate[1:5],r6a$estimate[1:7],r6b$estimate[1:7]),
          se=list(summary(r5)$estimate[6:10],summary(r5a)$estimate[8:14],summary(r5b)$estimate[8:14],summary(r6)$estimate[6:10],summary(r6a)$estimate[8:14],summary(r6b)$estimate[8:14]),
          p.auto=T, notes.append=F, df=F, initial.zero=F,
          notes="Dependent variable = yearly number of conflict-related deaths (UCDP event data). For interaction effects, variables are scaled to mean = 0 and standard deviation = 1. Data sources: see text. *p<.05, **p<.01, ***p<.001",
          omit.stat = c("rsq", "f"), intercept.bottom=F,
          omit=c("Year", "Country"), omit.labels=c("Time fixed-effects", "Country fixed-effects")
)


#########################################################################
## Plots for interaction effects 
#########################################################################

int2 <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+log(GDP_cap)+
               GDP_growth+log(Pop), 
             data=int_daten, family=negbin, model = "within", method="nr", print.level=3)

int2a <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary+log(GDP_cap)+
               GDP_growth+log(Pop), 
             data=int_daten, family=negbin, model = "within", method="nr", print.level=3)

int3 <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+log(GDP_cap)+
               GDP_growth+log(Pop), 
             data=int_daten, family=negbin, model = "within", method="nr", print.level=3)


##Code for interaction plot inspired by Anton Strezhnev, see http://www.causalloop.blogspot.com/2013/06/marginal-effect-plots-for-interaction.html
##seems as if not (yet) published as R package
interaction_plot_continuous <- function(model, effect, moderator, interaction, varcov="default", x_at=NULL,x_labels=TRUE, minimum="min", maximum="max", incr="default", num_points = 50, conf=.95, mean=FALSE, median=FALSE, alph=80, rugplot=T, histogram=T, title="Marginal effects plot", xlabel="Value of moderator", ylabel="Estimated marginal coefficient"){
  makeTransparent<-function(someColor, alpha=alph){
    newColor<-col2rgb(someColor)
    apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
                                                blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
  }
  if (varcov == "default"){
    covMat = cm
  }else{
    covMat = varcov
  }
  mod_frame = model.frame(model)
  beta_1 = model$coefficients[[effect]]
  beta_3 = model$coefficients[[interaction]]
  if (minimum == "min"){
    min_val = min(mod_frame[[moderator]])
  }else{
    min_val = minimum
  }
  if (maximum == "max"){
    max_val = max(mod_frame[[moderator]])
  }else{
    max_val = maximum
  }
  if (min_val > max_val){
    stop("Error: Minimum moderator value greater than maximum value.")
  }
  if (incr == "default"){
    increment = (max_val - min_val)/(num_points - 1)
  }else{
    increment = incr
  }
  x_2 <- seq(from=min_val, to=max_val, by=increment)
  delta_1 = beta_1 + beta_3*x_2
  var_1 = covMat[effect,effect] + (x_2^2)*covMat[interaction, interaction] + 2*x_2*covMat[effect, interaction]
  se_1 = sqrt(var_1)
  z_score = qnorm(1 - ((1 - conf)/2))
  upper_bound = delta_1 + z_score*se_1
  lower_bound = delta_1 - z_score*se_1
  max_y = max(upper_bound)
  min_y = min(lower_bound)
  hist_col = makeTransparent("darkgrey")
  plot(x=c(), y=c(), xaxt="n",ylim=c(min_y, max_y), xlim=c(min_val, max_val), xlab=xlabel, ylab=ylabel, main=title)
  lines(y=delta_1, x=x_2)
  lines(y=upper_bound, x=x_2, lty=2)
  lines(y=lower_bound, x=x_2, lty=2)
  axis(side=1, at=x_at,labels=x_labels)
  abline(h=0, lty=3)
  if (histogram){
    par(new=T)
    hist(mod_frame[[moderator]], axes=F, xlab="", ylab="",main="", border=hist_col, col=hist_col)
}}


#Fake model to glm object
gdaten <- daten[!(daten$Country %in% zeros),] #again: drop countries without any incidents
gdaten <- gdaten[!is.na(ucpd_daten$ucdp_fatalities),]

gdaten$YouthRatio <- scale(gdaten$YouthRatio)
gdaten$EduM2429Secondary <- scale(gdaten$EduM2429Secondary)
gdaten$EduM2429Tertiary <- scale(gdaten$EduM2429Tertiary)
gdaten$UnempMaleYouthILO <- scale(gdaten$UnempMaleYouthILO)


#Education
imod2 <- glm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary, family=poisson, data=gdaten)
imod2$coefficients[[1]] <- int2$estimate[[1]]
imod2$coefficients[[2]] <- int2$estimate[[2]]
imod2$coefficients[[3]] <- int2$estimate[[3]]
imod2$coefficients[[4]] <- int2$estimate[[7]]
vc2 <- vcov(int2)[c(1,2,3,7), c(1,2,3,7)]

#Education (tertiary)
imod2a <- glm(ucdp_fatalities ~ YouthRatio*EduM2429Tertiary, family=poisson, data=gdaten)
imod2a$coefficients[[1]] <- int2a$estimate[[1]]
imod2a$coefficients[[2]] <- int2a$estimate[[2]]
imod2a$coefficients[[3]] <- int2a$estimate[[3]]
imod2a$coefficients[[4]] <- int2a$estimate[[7]]
vc2a <- vcov(int2a)[c(1,2,3,7), c(1,2,3,7)]

#Unemployment
imod3 <- glm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO, family=poisson, data=gdaten)
imod3$coefficients[[1]] <- int3$estimate[[1]]
imod3$coefficients[[2]] <- int3$estimate[[2]]
imod3$coefficients[[3]] <- int3$estimate[[3]]
imod3$coefficients[[4]] <- int3$estimate[[7]]
vc3 <- vcov(int3)[c(1,2,3,7), c(1,2,3,7)]

#Translating the scaled x-axes to meaningful values
c(mean(ucpd_paneldaten$EduM2429Secondary, na.rm=T),sd(ucpd_paneldaten$EduM2429Secondary, na.rm=T))
c(mean(ucpd_paneldaten$EduM2429Tertiary, na.rm=T),sd(ucpd_paneldaten$EduM2429Tertiary, na.rm=T))
c(mean(ucpd_paneldaten$UnempMaleYouthILO, na.rm=T),sd(ucpd_paneldaten$UnempMaleYouthILO, na.rm=T))
edusec_lab <- c("8%", "27%", "47%", "66%","86%")
edutert_lab <- c("4%", "12%", "20%", "29%", "37%")
unemp_lab <- c("4%", "16%", "28%", "39%", "51%")

png("Figure 3.png", width=4000, height=8000,res=800, family="Garamond")
par(family="Garamond",mfrow=c(3,1),cex.lab=1,cex.main=1.2,oma = c(0, 0, 2, 0))
edu_plot <- interaction_plot_continuous(imod2, varcov=vc2,x_labels=edusec_lab,x_at=c(-2,-1,0,1,2),maximum=2,
                                        effect="YouthRatio", moderator="EduM2429Secondary", interaction="YouthRatio:EduM2429Secondary", 
                                        mean=T, title="by secondary education attainment",
                                        xlabel="Male (24-29 years) secondary education", ylabel="Marginal effect of youth bulge")
edu_plot2 <- interaction_plot_continuous(imod2a, varcov=vc2a,x_labels=edutert_lab,x_at=c(-1,0,1,2,3),
                                        effect="YouthRatio", moderator="EduM2429Tertiary", interaction="YouthRatio:EduM2429Tertiary", 
                                        mean=T, title="by tertiary education attainment",
                                        xlabel="Male (24-29 years) tertiary education", ylabel="Marginal effect of youth bulge")
unemp_plot <- interaction_plot_continuous(imod3, varcov=vc3,maximum=3,x_labels=unemp_lab,x_at=c(-1,0,1,2,3),
                                          effect="YouthRatio", moderator="UnempMaleYouthILO", interaction="YouthRatio:UnempMaleYouthILO", 
                                          mean=T, title="by unemployment rate",
                                          xlabel="Unemployment rate among male youth", ylabel="Marginal effect of youth bulge")
mtext(expression(bold("Figure 3: Impact of youth bulge on political violence")), outer = TRUE, cex = 1.2)
dev.off()



#########################################################################
## Robustness: Simulations with randomly drawn controls
#########################################################################

#Note: Estimation at times does not converge, therefore this may take some time.
#Data file provides lists (ts_0, ts_1, ts_2) with succesful runs.
#Alternatively, rename objects "new_ts_[...]" to "ts_[...]" in the following and start over again.


# (1) Main effect for YouthRatio

#List of control variables - only time-variant - no GINI because many NAs
controls <- names(daten)[c(6,9,11:17,19:28,31,33:36,44:45,55:56)]

new_ts_0 <- NULL

#If estimation fails to converge and loop aborts with error, repeat the following until length(ts_0)>1,000
for (i in 1:2000){
  paneldaten <- ucpd_paneldaten
  
  #Randomly select 3 out of the 28 possible control variables
  cs <- sample(controls,3,replace=F)
  paneldaten$a <- paneldaten[,names(paneldaten) %in% cs[1]]
  paneldaten$b <- paneldaten[,names(paneldaten) %in% cs[2]]
  paneldaten$c <- paneldaten[,names(paneldaten) %in% cs[3]]
  if(cs[1]=="GDP_cap") paneldaten$a <- log1p(paneldaten$a)
  if(cs[2]=="GDP_cap") paneldaten$b <- log1p(paneldaten$b)
  if(cs[3]=="GDP_cap") paneldaten$c <- log1p(paneldaten$c)
  if(cs[1]=="GDP_t_20") paneldaten$a <- log1p(paneldaten$a)
  if(cs[2]=="GDP_t_20") paneldaten$b <- log1p(paneldaten$b)
  if(cs[3]=="GDP_t_20") paneldaten$c <- log1p(paneldaten$c)
  if(cs[1]=="Pop") paneldaten$a <- log1p(paneldaten$a)
  if(cs[2]=="Pop") paneldaten$b <- log1p(paneldaten$b)
  if(cs[3]=="Pop") paneldaten$c <- log1p(paneldaten$c)
  
  #Estimate the model with youth ratio and the 3 controls
  modell <- pglm(ucdp_fatalities ~ YouthRatio+a+b+c, family="negbin",method="sann",
                 data=paneldaten, print.level=3,model = "within", effects="twoways")
  summary(modell)
  new_ts_0 <- rbind(new_ts_0,summary(modell)$estimate[2,3])
}

ts_0a <- unlist(ts_0)[!is.na(unlist(ts_0))&(unlist(ts_0)!=0)] #drop values where algorithm didn't converge


#How many of the 1,000 models are significant?
length(ts_0a[ts_0a > 1.96])/length(ts_0a)
length(ts_0a[ts_0a >= 0])/length(ts_0a) #how many are positive


# (2) Interaction YouthRatio*Education

new_ts_1 <- NULL

#If estimation fails to converge and loop aborts with error, repeat the following until length(ts_1)>1,000
for (i in 1:2000){
  paneldaten <- int_daten
  
  #Randomly select 3 out of the 28 possible control variables
  cs <- sample(controls,3,replace=F)
  paneldaten$a <- paneldaten[,names(paneldaten) %in% cs[1]]
  paneldaten$b <- paneldaten[,names(paneldaten) %in% cs[2]]
  paneldaten$c <- paneldaten[,names(paneldaten) %in% cs[3]]
  
  #Estimate the model with youth ratio and the 3 controls
  modell <- pglm(ucdp_fatalities ~ YouthRatio*EduM2429Secondary+a+b+c, family="negbin",method="nr",
                 data=paneldaten, print.level=3,model = "within", effects="twoways")
  summary(modell)
  new_ts_1 <- rbind(new_ts_1,summary(modell)$estimate[7,3])
}

ts_1a <- unlist(ts_1)[!is.na(unlist(ts_1))&(unlist(ts_1)!=0)]
ts_1a <- sample(ts_1a, 1000, F)


#How many of the 1,000 models are significant?
length(ts_1a[ts_1a > 1.96])/length(ts_1a)



# (3) Interaction YouthRatio*Unemployment

new_ts_2 <- NULL

#If estimation fails to converge and loop aborts with error, repeat the following until length(ts_2)>1,000
for (i in 1:2000){
  paneldaten <- int_daten
  
  #Randomly select 3 out of the 26 possible control variables
  cs <- sample(controls,3,replace=F)
  paneldaten$a <- paneldaten[,names(paneldaten) %in% cs[1]]
  paneldaten$b <- paneldaten[,names(paneldaten) %in% cs[2]]
  paneldaten$c <- paneldaten[,names(paneldaten) %in% cs[3]]
  
  #Estimate the model with youth ratio and the 3 controls
  modell <- pglm(ucdp_fatalities ~ YouthRatio*UnempMaleYouthILO+a+b+c, family="negbin",method="nr",
                 data=paneldaten, print.level=3,model = "within", effects="twoways")
  summary(modell)
  new_ts_2 <- rbind(new_ts_2,summary(modell)$estimate[7,3])
}

ts_2a <- unlist(ts_2)[!is.na(unlist(ts_2))&(unlist(ts_2)!=0)]
ts_2a <- sample(ts_2a, 1000, F)


#How many of the 1,000 models are significant?
length(ts_2a[ts_2a > 1.96])/length(ts_2a)


#########################################################################
#Export plot
png("Figure 4.png", width=4000, height=8000,res=600)
par(mfrow=c(3,1), oma=c(0,0,2.5,0), family="Garamond", cex.lab=1,cex.main=1.5)
hist(ts_0a, main = "Youth ratio", xlab = "t value", cex.lab=1,cex.main=1.5,xlim=c(-500,500),breaks=200)
abline(v=1.96,col="red", lty=3)
hist(ts_1a, main = "Youth ratio * Secondary education", xlab = "t value", cex.lab=1,cex.main=1.5,xlim=c(-20,20),breaks=1000)
abline(v=1.96,col="red", lty=3)
hist(ts_2a, main = "Youth ratio * Male youth unemployment", xlab = "t value", cex.lab=1,cex.main=1.5,xlim=c(-20,20),breaks=800)
abline(v=1.96,col="red", lty=3)
title("Figure 4: T values from 1,000 models with randomly \nselected control variables (DV = political violence)", outer = TRUE, cex=2)
dev.off()   



#########################################################################
## Out-of-sample-predictions
#########################################################################

set.seed(98765)

mse_1_list <- NULL
mse_2_list <- NULL

for(j in 1:100){
  #(1) Assign 10% of the data as testing and 90% as training
  s <- sample(1:1960,196,F)
  
  train_data <- int_daten[-s,]
  test_data <- int_daten[s,]
  
  #(2) Estimate the model using the training data - with and without youth bulge 
  train1 <- pglm(ucdp_fatalities ~ EduM2429Secondary+log(GDP_cap)+
                   GDP_growth+log(Pop), 
                 data=train_data, family=negbin, model = "within", method="nm", print.level=3)
  
  train2 <- pglm(ucdp_fatalities ~ YouthRatio+EduM2429Secondary+log(GDP_cap)+
                    GDP_growth+log(Pop), 
                  data=train_data, family=negbin, model = "within", method="nm", print.level=3)
  
  #(3) Use the models to predict fatalities in the test dataset
  
  #Without youth ratio
  real_values <- test_data$ucdp_fatalities
  fitted_values <- NULL
  for(i in 1:length(test_data$ucdp_fatalities)){
    fitted_values <- c(fitted_values, 
                       mean(train_data$ucdp_fatalities[train_data$Country==
                                                         test_data$Country[[i]]],na.rm=T)*
                         exp(train1$estimate[[2]]*(test_data$EduM2429Secondary[[i]]-mean(
                           train_data$EduM2429Secondary[train_data$Country==test_data$Country[[i]]],na.rm=T)))+
                         exp(train1$estimate[[3]]*(log(test_data$GDP_cap[[i]])-log(mean(
                           train_data$GDP_cap[train_data$Country==test_data$Country[[i]]],na.rm=T))))+
                         exp(train1$estimate[[4]]*(test_data$GDP_growth[[i]]-mean(
                           train_data$GDP_growth[train_data$Country==test_data$Country[[i]]],na.rm=T)))+
                         exp(train1$estimate[[5]]*(log(test_data$Pop[[i]])-log(mean(
                           train_data$GDP_cap[train_data$Country==test_data$Country[[i]]],na.rm=T)))))}
  fitted_values
  
  #Now with the youth bulge predictor
  fitted_values2 <- NULL
  for(i in 1:length(test_data$ucdp_fatalities)){
    fitted_values2 <- c(fitted_values2, 
                        mean(train_data$ucdp_fatalities[train_data$Country==
                                                          test_data$Country[[i]]],na.rm=T)*
                          exp(train2$estimate[[2]]*(test_data$YouthRatio[[i]]-mean(
                            train_data$YouthRatio[train_data$Country==test_data$Country[[i]]],na.rm=T)))+
                          exp(train2$estimate[[3]]*(test_data$EduM2429Secondary[[i]]-mean(
                            train_data$EduM2429Secondary[train_data$Country==test_data$Country[[i]]],na.rm=T)))+
                          exp(train2$estimate[[4]]*(log(test_data$GDP_cap[[i]])-log(mean(
                            train_data$GDP_cap[train_data$Country==test_data$Country[[i]]],na.rm=T))))+
                          exp(train2$estimate[[5]]*(test_data$GDP_growth[[i]]-mean(
                            train_data$GDP_growth[train_data$Country==test_data$Country[[i]]],na.rm=T)))+
                          exp(train2$estimate[[6]]*(log(test_data$Pop[[i]])-log(mean(
                            train_data$GDP_cap[train_data$Country==test_data$Country[[i]]],na.rm=T)))))}
  fitted_values2
  
  
  #Mean absolute error
  mse_1 <- (sum(abs(real_values-fitted_values),na.rm=T))/length(fitted_values)
  mse_2 <- (sum(abs(real_values-fitted_values2),na.rm=T))/length(fitted_values2)
  mse_1_list <- c(mse_1_list,mse_1)
  mse_2_list <- c(mse_2_list,mse_2)
}

mea_1a <- mean(mse_1_list[mse_1_list<=1000]) #exclude outliers where algorithm did not converge
mea_2a <- mean(mse_2_list[mse_2_list<=1000])



########################################
#Naive prediction based on mean values:

#Global average of training data as guess:
mse_1_list <- NULL

for(j in 1:100){
  #(1) Assign 10% of the data as testing and 90% as training
  s <- sample(1:1960,196,F)
  
  train_data <- int_daten[-s,]
  test_data <- int_daten[s,]
  
  #(2) Predict fatalities in the test dataset based on global average from the training data
  real_values <- test_data$ucdp_fatalities
  fitted_values <- NULL
  for(i in 1:length(test_data$ucdp_fatalities)){
    fitted_values <- c(fitted_values, 
                       mean(train_data$ucdp_fatalities,na.rm=T))}
  fitted_values
  
  #Mean absolute error
  mse_1 <- (sum(abs(real_values-fitted_values),na.rm=T))/length(fitted_values)
  mse_1_list <- c(mse_1_list,mse_1)
}

mse_1_list
mea_0 <- mean(mse_1_list[mse_1_list<=1000]) #exclude outliers where algorithm did not converge
mea_0


#With country-specific means:
mse_1_list <- NULL

for(j in 1:100){
  #(1) Assign 10% of the data as testing and 90% as training
  s <- sample(1:1960,196,F)
  
  train_data <- int_daten[-s,]
  test_data <- int_daten[s,]
  
  #(2) Predict fatalities in the test dataset based on country average from the training data
  
  real_values <- test_data$ucdp_fatalities
  fitted_values <- NULL
  for(i in 1:length(test_data$ucdp_fatalities)){
    fitted_values <- c(fitted_values, 
                       mean(train_data$ucdp_fatalities[train_data$Country==
                                                         test_data$Country[[i]]],na.rm=T))}
  fitted_values
  
  #Mean absolute error
  mse_1 <- (sum(abs(real_values-fitted_values),na.rm=T))/length(fitted_values)
  mse_1_list <- c(mse_1_list,mse_1)
}

mse_1_list
mea_0a <- mean(mse_1_list[mse_1_list<=1000]) #exclude outliers where algorithm did not converge
mea_0a

results <- data.frame("Model"=c("Forecasts based on global average of training data",
                                "Forecasts based on country-specific averages of training data",
                                "Model with GDP, population, GDP growth, education, and country fixed-effects",
                                "Same model + youth ratio"),
                      "Deviance" = c(mea_0, mea_0a, mea_1a, mea_2a))
write.csv(results, file="Table4.csv")


